{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = \"all\"\n",
    "\n",
    "import gc\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>eval_set</th>\n",
       "      <th>order_number</th>\n",
       "      <th>order_dow</th>\n",
       "      <th>order_hour_of_day</th>\n",
       "      <th>days_since_prior_order</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2539329</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2398795</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>473747</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>12</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2254736</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>29.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>431534</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>15</td>\n",
       "      <td>28.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3367565</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>550135</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>3108588</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2295261</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2550362</td>\n",
       "      <td>1</td>\n",
       "      <td>prior</td>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>30.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1187899</td>\n",
       "      <td>1</td>\n",
       "      <td>train</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2168274</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1501582</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1901567</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>738281</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>10</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1673511</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>11</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1199898</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>3194192</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>788338</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>27.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1718559</td>\n",
       "      <td>2</td>\n",
       "      <td>prior</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    order_id  user_id eval_set  order_number  order_dow  order_hour_of_day  \\\n",
       "0    2539329        1    prior             1          2                  8   \n",
       "1    2398795        1    prior             2          3                  7   \n",
       "2     473747        1    prior             3          3                 12   \n",
       "3    2254736        1    prior             4          4                  7   \n",
       "4     431534        1    prior             5          4                 15   \n",
       "5    3367565        1    prior             6          2                  7   \n",
       "6     550135        1    prior             7          1                  9   \n",
       "7    3108588        1    prior             8          1                 14   \n",
       "8    2295261        1    prior             9          1                 16   \n",
       "9    2550362        1    prior            10          4                  8   \n",
       "10   1187899        1    train            11          4                  8   \n",
       "11   2168274        2    prior             1          2                 11   \n",
       "12   1501582        2    prior             2          5                 10   \n",
       "13   1901567        2    prior             3          1                 10   \n",
       "14    738281        2    prior             4          2                 10   \n",
       "15   1673511        2    prior             5          3                 11   \n",
       "16   1199898        2    prior             6          2                  9   \n",
       "17   3194192        2    prior             7          2                 12   \n",
       "18    788338        2    prior             8          1                 15   \n",
       "19   1718559        2    prior             9          2                  9   \n",
       "\n",
       "    days_since_prior_order  \n",
       "0                      NaN  \n",
       "1                     15.0  \n",
       "2                     21.0  \n",
       "3                     29.0  \n",
       "4                     28.0  \n",
       "5                     19.0  \n",
       "6                     20.0  \n",
       "7                     14.0  \n",
       "8                      0.0  \n",
       "9                     30.0  \n",
       "10                    14.0  \n",
       "11                     NaN  \n",
       "12                    10.0  \n",
       "13                     3.0  \n",
       "14                     8.0  \n",
       "15                     8.0  \n",
       "16                    13.0  \n",
       "17                    14.0  \n",
       "18                    27.0  \n",
       "19                     8.0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>order_number</th>\n",
       "      <th>days_since_prior_order</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>29.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>28.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>30.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>27.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    user_id  order_number  days_since_prior_order\n",
       "0         1             1                     NaN\n",
       "1         1             2                    15.0\n",
       "2         1             3                    21.0\n",
       "3         1             4                    29.0\n",
       "4         1             5                    28.0\n",
       "5         1             6                    19.0\n",
       "6         1             7                    20.0\n",
       "7         1             8                    14.0\n",
       "8         1             9                     0.0\n",
       "9         1            10                    30.0\n",
       "10        1            11                    14.0\n",
       "11        2             1                     NaN\n",
       "12        2             2                    10.0\n",
       "13        2             3                     3.0\n",
       "14        2             4                     8.0\n",
       "15        2             5                     8.0\n",
       "16        2             6                    13.0\n",
       "17        2             7                    14.0\n",
       "18        2             8                    27.0\n",
       "19        2             9                     8.0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>order_number</th>\n",
       "      <th>days_since_prior_order_comsum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>36.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>65.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>93.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>112.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>132.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>146.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>146.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>176.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>190.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>29.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>42.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>56.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>83.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    user_id  order_number  days_since_prior_order_comsum\n",
       "0         1             1                            NaN\n",
       "1         1             2                           15.0\n",
       "2         1             3                           36.0\n",
       "3         1             4                           65.0\n",
       "4         1             5                           93.0\n",
       "5         1             6                          112.0\n",
       "6         1             7                          132.0\n",
       "7         1             8                          146.0\n",
       "8         1             9                          146.0\n",
       "9         1            10                          176.0\n",
       "10        1            11                          190.0\n",
       "11        2             1                            NaN\n",
       "12        2             2                           10.0\n",
       "13        2             3                           13.0\n",
       "14        2             4                           21.0\n",
       "15        2             5                           29.0\n",
       "16        2             6                           42.0\n",
       "17        2             7                           56.0\n",
       "18        2             8                           83.0\n",
       "19        2             9                           91.0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#  python3 orders_comsum.py\n",
    "\n",
    "import gc\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import json\n",
    "import sklearn.metrics\n",
    "from sklearn.metrics import f1_score\n",
    "from sklearn.model_selection import train_test_split\n",
    "from scipy.sparse import dok_matrix, coo_matrix\n",
    "from sklearn.utils.multiclass import  type_of_target\n",
    "\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    data_path = \"data\"\n",
    "    path = \"data\"\n",
    "\n",
    "#     aisles = pd.read_csv(os.path.join(path, \"aisles.csv\"), dtype={'aisle_id': np.uint8, 'aisle': 'category'})\n",
    "#     departments = pd.read_csv(os.path.join(path, \"departments.csv\"),\n",
    "#                               dtype={'department_id': np.uint8, 'department': 'category'})\n",
    "#     order_prior = pd.read_csv(os.path.join(path, \"order_products__prior.csv\"), dtype={'order_id': np.uint32,\n",
    "#                                                                                       'product_id': np.uint16,\n",
    "#                                                                                       'add_to_cart_order': np.uint8,\n",
    "#                                                                                       'reordered': bool})\n",
    "#     order_train = pd.read_csv(os.path.join(path, \"order_products__train.csv\"), dtype={'order_id': np.uint32,\n",
    "#                                                                                       'product_id': np.uint16,\n",
    "#                                                                                       'add_to_cart_order': np.uint8,\n",
    "#                                                                                       'reordered': bool})\n",
    "#     orders = pd.read_csv(os.path.join(path, \"orders.csv\"), dtype={'order_id': np.uint32,\n",
    "#                                                                   'user_id': np.uint32,\n",
    "#                                                                   'eval_set': 'category',\n",
    "#                                                                   'order_number': np.uint8,\n",
    "#                                                                   'order_dow': np.uint8,\n",
    "#                                                                   'order_hour_of_day': np.uint8\n",
    "#                                                                   })\n",
    "\n",
    "#     products = pd.read_csv(os.path.join(path, \"products.csv\"), dtype={'product_id': np.uint16,\n",
    "#                                                                       'aisle_id': np.uint8,\n",
    "#                                                                       'department_id': np.uint8})\n",
    "\n",
    "#     labels = pd.read_pickle(os.path.join(data_path, 'chunk_0.pkl'))\n",
    "#     user_product = pd.read_pickle(os.path.join(data_path, 'previous_products.pkl'))\n",
    "\n",
    "    orders.head(20)\n",
    "    \n",
    "    tmp = orders[['user_id', 'order_number', 'days_since_prior_order']].head(10000)\n",
    "    tmp.head(20)\n",
    "\n",
    "    order_comsum = orders[['user_id', 'order_number', 'days_since_prior_order']].groupby(['user_id', 'order_number'])\\\n",
    "            ['days_since_prior_order'].sum().groupby(level=[0]).cumsum().reset_index().rename(columns={'days_since_prior_order':'days_since_prior_order_comsum'})\n",
    "    order_comsum.head(20)\n",
    "        \n",
    "    order_comsum['days_since_prior_order_comsum'].fillna(0, inplace=True)\n",
    "#     order_comsum.to_pickle('data/orders_comsum.pkl')\n",
    "\n",
    "    order_comsum = pd.merge(order_comsum, orders, on=['user_id', 'order_number'])[['user_id', 'order_number', 'days_since_prior_order_comsum', 'order_id']]\n",
    "\n",
    "    order_product = pd.merge(order_prior, orders, on='order_id')[['order_id', 'product_id', 'eval_set']]\n",
    "    order_product_train_test = labels[['order_id', 'product_id', 'eval_set']]\n",
    "\n",
    "    order_product = pd.concat([order_product, order_product_train_test])\n",
    "\n",
    "    order_product = pd.merge(order_product, order_comsum, on='order_id')\n",
    "\n",
    "    print(order_product.columns)\n",
    "\n",
    "    order_product = pd.merge(order_product, user_product, on=['user_id', 'product_id'])\n",
    "\n",
    "    temp = order_product.groupby(['user_id', 'product_id', 'order_number'])['days_since_prior_order_comsum'].sum().groupby(level=[0, 1]).apply(lambda x: np.diff(np.nan_to_num(x)))\n",
    "    temp = temp.to_frame('periods').reset_index()\n",
    "\n",
    "#     temp.to_pickle('data/product_period.pkl')\n",
    "\n",
    "    aggregated = temp.copy()\n",
    "    aggregated['last'] = aggregated.periods.apply(lambda x: x[-1])\n",
    "    aggregated['prev1'] = aggregated.periods.apply(lambda x: x[-2] if len(x) > 1 else np.nan)\n",
    "    aggregated['prev2'] = aggregated.periods.apply(lambda x: x[-3] if len(x) > 2 else np.nan)\n",
    "    aggregated['median'] = aggregated.periods.apply(lambda x: np.median(x[:-1]))\n",
    "    aggregated['mean'] = aggregated.periods.apply(lambda x: np.mean(x[:-1]))\n",
    "    aggregated.drop('periods', axis=1, inplace=True)\n",
    "\n",
    "#     aggregated.to_pickle('data/product_periods_stat.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
